Exploratory Data Analysis: Offensive Efficiency in the Modern NBA

The modern NBA has steadily evolved toward higher offensive efficiency, with a clear acceleration in the last decade. Using league-average annual data from 1980–2025, I track Offensive Rating (ORtg), Pace, and 3-Point Attempt Rate (3PAr), then layer in attendance to capture COVID’s shock, and finally use weekly sports-betting stocks as a compact example of seasonal decomposition.

Code
library(tidyverse)
library(ggplot2)
library(forecast)
library(astsa)
library(xts)
library(tseries)
library(fpp2)
library(fma)
library(lubridate)
library(TSstudio)
library(readr)
library(dplyr)
library(patchwork)
library(seasonal)
library(GGally)

theme_set(theme_minimal(base_size = 12))

all_adv_files <- list.files("data/adv_stats", pattern = "*.csv", full.names = TRUE)

all_adv_data <- map_df(all_adv_files, function(file) {
    season_str <- str_extract(basename(file), "\\d{4}-\\d{2}")
    season_year <- as.numeric(str_sub(season_str, 1, 4)) + 1

    df <- read_csv(file, show_col_types = FALSE)
    df$Season <- season_year
    return(df)
})

league_avg <- all_adv_data %>%
    group_by(Season) %>%
    summarise(
        ORtg = mean(`Unnamed: 10_level_0_ORtg`, na.rm = TRUE),
        DRtg = mean(`Unnamed: 11_level_0_DRtg`, na.rm = TRUE),
        Pace = mean(`Unnamed: 13_level_0_Pace`, na.rm = TRUE),
        `3PAr` = mean(`Unnamed: 15_level_0_3PAr`, na.rm = TRUE),
        `TS%` = mean(`Unnamed: 16_level_0_TS%`, na.rm = TRUE),
        `eFG%` = mean(`Offense Four Factors_eFG%`, na.rm = TRUE),
        .groups = "drop"
    )

1. Offensive Rating (ORtg)

1.1 Time Series Visualization

Code
# Convert to time series object
ts_ortg <- ts(league_avg$ORtg, start = 1980, frequency = 1)

# Create visualization
df_ortg <- data.frame(
    Year = league_avg$Season,
    Value = league_avg$ORtg,
    Era = case_when(
        league_avg$Season < 2012 ~ "Pre-Analytics Era",
        league_avg$Season >= 2012 & league_avg$Season < 2020 ~ "Analytics Era",
        league_avg$Season >= 2020 ~ "Post-COVID Era"
    )
)

ggplot(df_ortg, aes(x = Year, y = Value, color = Era)) +
    geom_line(size = 1.2) +
    geom_point(size = 3) +
    geom_vline(xintercept = 2012, linetype = "dashed", color = "#f58426", size = 1) +
    geom_vline(xintercept = 2020, linetype = "dashed", color = "#bec0c2", size = 1) +
    annotate("text",
        x = 2012, y = 112, label = "Analytics Era\nBegins (2012)",
        hjust = -0.1, color = "#f58426", fontface = "bold", size = 3.5
    ) +
    annotate("text",
        x = 2020, y = 112, label = "COVID-19\n(2020)",
        hjust = 1.1, color = "#bec0c2", fontface = "bold", size = 3.5
    ) +
    scale_color_manual(values = c(
        "Pre-Analytics Era" = "#006bb6",
        "Analytics Era" = "#f58426",
        "Post-COVID Era" = "#bec0c2"
    )) +
    labs(
        title = "NBA Offensive Rating (1980-2025): Evolution of Scoring Efficiency",
        x = "Season",
        y = "Offensive Rating (ORtg)",
        color = "Era"
    ) +
    theme_minimal(base_size = 12) +
    theme(
        plot.title = element_text(face = "bold", size = 14),
        plot.subtitle = element_text(size = 11, color = "gray40"),
        legend.position = "bottom"
    )

1.2 Lag Plots

Code
gglagplot(ts_ortg, do.lines = FALSE, lags = 9) +
    ggtitle("Lag Plot of Offensive Rating (ORtg)") +
    theme_minimal()

1.3 ACF and PACF Analysis

Code
acf_ortg <- ggAcf(ts_ortg, lag.max = 20) +
    labs(title = "ACF of Offensive Rating (ORtg)") +
    theme_minimal()

pacf_ortg <- ggPacf(ts_ortg, lag.max = 20) +
    labs(title = "PACF of Offensive Rating (ORtg)") +
    theme_minimal()

acf_ortg / pacf_ortg

1.4 Augmented Dickey-Fuller Test

Code
adf_ortg <- adf.test(ts_ortg)
print(adf_ortg)

    Augmented Dickey-Fuller Test

data:  ts_ortg
Dickey-Fuller = -1.0264, Lag order = 3, p-value = 0.9233
alternative hypothesis: stationary

1.5 Trend Decomposition - Additive Model

Code
# Create data frame for decomposition
df_ortg_decomp <- data.frame(
    Year = time(ts_ortg),
    Value = as.numeric(ts_ortg)
)

# Fit LOESS smooth to extract trend (additive decomposition)
df_ortg_decomp$Trend <- predict(loess(Value ~ Year, data = df_ortg_decomp, span = 0.3))
df_ortg_decomp$Irregular <- df_ortg_decomp$Value - df_ortg_decomp$Trend # Additive: residual = observed - trend

# Visualize components
p1 <- ggplot(df_ortg_decomp, aes(x = Year)) +
    geom_line(aes(y = Value, color = "Original"), size = 1) +
    geom_line(aes(y = Trend, color = "Trend"), size = 1.2) +
    scale_color_manual(values = c("Original" = "#006bb6", "Trend" = "#f58426")) +
    labs(title = "ORtg: Original Series vs. Trend (Additive Decomposition)", y = "Offensive Rating") +
    theme_minimal() +
    theme(legend.title = element_blank())

p2 <- ggplot(df_ortg_decomp, aes(x = Year, y = Irregular)) +
    geom_hline(yintercept = 0, linetype = "dashed", color = "gray50") +
    geom_line(color = "#000000", size = 0.8) +
    geom_point(color = "#000000", size = 2) +
    labs(title = "ORtg: Irregular Component (Additive Residuals)", y = "Residual (points)") +
    theme_minimal()

p1 / p2

1.6 Differencing for Stationarity

Code
# First difference
diff_ortg <- diff(ts_ortg, differences = 1)

par(mfrow = c(2, 1))
plot(ts_ortg, main = "Original ORtg Series", ylab = "ORtg", xlab = "Year")
plot(diff_ortg, main = "First Differenced ORtg Series", ylab = "Change in ORtg", xlab = "Year")

Code
acf_diff_ortg <- ggAcf(diff_ortg, lag.max = 20) +
    labs(title = "ACF of First Differenced ORtg") +
    theme_minimal()

pacf_diff_ortg <- ggPacf(diff_ortg, lag.max = 20) +
    labs(title = "PACF of First Differenced ORtg") +
    theme_minimal()

acf_diff_ortg / pacf_diff_ortg

Code
adf_diff_ortg <- adf.test(diff_ortg)
print(adf_diff_ortg)

    Augmented Dickey-Fuller Test

data:  diff_ortg
Dickey-Fuller = -3.174, Lag order = 3, p-value = 0.109
alternative hypothesis: stationary

ORtg, points per 100 possessions, is the primary outcome. The long-run trend is unambiguously upward but non-linear. A slow climb through the 1980s–2000s, then a pronounced step-up beginning around 2012, and continued gains into the post-COVID years. Autocorrelation patterns (slow ACF decay and PACF spike at lag 1) and an ADF test confirm ORtg is non-stationary in levels but becomes stationary after first-differencing; variance is roughly constant, so an additive structure fits. A simple LOESS trend explains nearly all variation, with small residuals. This implies that the story is primarily about a structural trend rather than short-cycle oscillations.

2. Mediating Variable: Pace

2.1 Time Series Visualization

Code
ts_pace <- ts(league_avg$Pace, start = 1980, frequency = 1)

df_pace <- data.frame(
    Year = league_avg$Season,
    Value = league_avg$Pace,
    Era = df_ortg$Era
)

ggplot(df_pace, aes(x = Year, y = Value, color = Era)) +
    geom_line(size = 1.2) +
    geom_point(size = 3) +
    geom_vline(xintercept = 2012, linetype = "dashed", color = "#f58426", size = 1) +
    scale_color_manual(values = c(
        "Pre-Analytics Era" = "#006bb6",
        "Analytics Era" = "#f58426",
        "Post-COVID Era" = "#bec0c2"
    )) +
    labs(
        title = "NBA Pace (1980-2025): Possessions Per 48 Minutes",
        x = "Season",
        y = "Pace (Possessions per 48 min)",
        color = "Era"
    ) +
    theme_minimal(base_size = 12) +
    theme(
        plot.title = element_text(face = "bold", size = 14),
        plot.subtitle = element_text(size = 11, color = "gray40"),
        legend.position = "bottom"
    )

2.2 Lag Plots

Code
gglagplot(ts_pace, do.lines = FALSE, lags = 9) +
    ggtitle("Lag Plot of Pace") +
    theme_minimal()

2.3 ACF and PACF Analysis

Code
acf_pace <- ggAcf(ts_pace, lag.max = 20) +
    labs(title = "ACF of Pace") +
    theme_minimal()

pacf_pace <- ggPacf(ts_pace, lag.max = 20) +
    labs(title = "PACF of Pace") +
    theme_minimal()

acf_pace / pacf_pace

2.3 Stationarity Testing

Code
adf_pace <- adf.test(ts_pace)
print(adf_pace)

    Augmented Dickey-Fuller Test

data:  ts_pace
Dickey-Fuller = -1.4007, Lag order = 3, p-value = 0.8116
alternative hypothesis: stationary
Code
diff_pace <- diff(ts_pace, differences = 1)

par(mfrow = c(2, 1))
plot(ts_pace, main = "Original Pace Series", ylab = "Pace", xlab = "Year")
plot(diff_pace, main = "First Differenced Pace Series", ylab = "Change in Pace", xlab = "Year")

Code
acf_diff_pace <- ggAcf(diff_pace, lag.max = 20) +
    labs(title = "ACF of First Differenced Pace") +
    theme_minimal()

pacf_diff_pace <- ggPacf(diff_pace, lag.max = 20) +
    labs(title = "PACF of First Differenced Pace") +
    theme_minimal()

acf_diff_pace / pacf_diff_pace

Code
adf_diff_pace <- adf.test(diff_pace)
print(adf_diff_pace)

    Augmented Dickey-Fuller Test

data:  diff_pace
Dickey-Fuller = -2.9769, Lag order = 3, p-value = 0.187
alternative hypothesis: stationary

2.4 Moving Average Smoothing

Code
ma_pace_3 <- ma(ts_pace, order = 3) # 3-year window (short-term)
ma_pace_5 <- ma(ts_pace, order = 5) # 5-year window (medium-term)
ma_pace_10 <- ma(ts_pace, order = 10) # 10-year window (long-term)

autoplot(ts_pace, series = "Original") +
    autolayer(ma_pace_3, series = "MA(3)") +
    autolayer(ma_pace_5, series = "MA(5)") +
    autolayer(ma_pace_10, series = "MA(10)") +
    scale_color_manual(
        values = c(
            "Original" = "gray60",
            "MA(3)" = "#006bb6",
            "MA(5)" = "#f58426",
            "MA(10)" = "#000000"
        ),
        breaks = c("Original", "MA(3)", "MA(5)", "MA(10)")
    ) +
    labs(
        title = "Pace: Moving Average Smoothing Comparison",
        subtitle = "U-shaped trajectory becomes clearer with increased smoothing",
        y = "Pace (possessions per 48 min)",
        x = "Season",
        color = "Series"
    ) +
    theme_minimal(base_size = 12) +
    theme(
        plot.title = element_text(face = "bold", size = 14),
        plot.subtitle = element_text(size = 11, color = "gray40"),
        legend.position = "bottom"
    )

Pace, the mediator in this story, follows a different trajectory: a classic U-shape. Possessions per 48 minutes decline from fast 1980s basketball to a trough in the mid-2000s, then recover through the 2010s and 2020s. Importantly, the Pace recovery begins before the analytics inflection, suggesting it is not simply a byproduct of analytics. Like ORtg, Pace is non-stationary in levels and stationary in first differences; moving-average smoothers with 5–10 year windows make the U-shape especially clear. This is rather significant as this means efficiency gains do not reduce to “more possessions”.

3. Key Independent Variable: 3-Point Attempt Rate (3PAr)

3.1 Time Series Visualization

Code
ts_3par <- ts(league_avg$`3PAr`, start = 1980, frequency = 1)

df_3par <- data.frame(
    Year = league_avg$Season,
    Value = league_avg$`3PAr`,
    Era = df_ortg$Era
)

ggplot(df_3par, aes(x = Year, y = Value, color = Era)) +
    geom_line(size = 1.2) +
    geom_point(size = 3) +
    geom_vline(xintercept = 2012, linetype = "dashed", color = "#f58426", size = 1) +
    annotate("text",
        x = 2012, y = 0.44, label = "Analytics Era Begins",
        hjust = -0.05, color = "#f58426", fontface = "bold", size = 3.5
    ) +
    scale_color_manual(values = c(
        "Pre-Analytics Era" = "#006bb6",
        "Analytics Era" = "#f58426",
        "Post-COVID Era" = "#bec0c2"
    )) +
    scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
    labs(
        title = "NBA 3-Point Attempt Rate (1980-2025)",
        subtitle = "Percentage of field goal attempts that are three-pointers",
        x = "Season",
        y = "3-Point Attempt Rate (3PAr)",
        color = "Era"
    ) +
    theme_minimal(base_size = 12) +
    theme(
        plot.title = element_text(face = "bold", size = 14),
        plot.subtitle = element_text(size = 11, color = "gray40"),
        legend.position = "bottom"
    )

3.2 Lag Plots

Code
gglagplot(ts_3par, do.lines = FALSE, lags = 9) +
    ggtitle("Lag Plot of 3-Point Attempt Rate (3PAr)") +
    theme_minimal()

3.3 Stationarity Analysis

Code
acf_3par <- ggAcf(ts_3par, lag.max = 20) +
    labs(title = "ACF of 3PAr") +
    theme_minimal()

pacf_3par <- ggPacf(ts_3par, lag.max = 20) +
    labs(title = "PACF of 3PAr") +
    theme_minimal()

acf_3par / pacf_3par

Code
adf_3par <- adf.test(ts_3par)
print(adf_3par)

    Augmented Dickey-Fuller Test

data:  ts_3par
Dickey-Fuller = -1.3536, Lag order = 3, p-value = 0.8303
alternative hypothesis: stationary
Code
diff_3par <- diff(ts_3par, differences = 1)

par(mfrow = c(2, 1))
plot(ts_3par, main = "Original 3PAr Series", ylab = "3PAr", xlab = "Year")
plot(diff_3par, main = "First Differenced 3PAr Series", ylab = "Change in 3PAr", xlab = "Year")

Code
acf_diff_3par <- ggAcf(diff_3par, lag.max = 20) +
    labs(title = "ACF of First Differenced 3PAr") +
    theme_minimal()

pacf_diff_3par <- ggPacf(diff_3par, lag.max = 20) +
    labs(title = "PACF of First Differenced 3PAr") +
    theme_minimal()

acf_diff_3par / pacf_diff_3par

Code
adf_diff_3par <- adf.test(diff_3par)
print(adf_diff_3par)

    Augmented Dickey-Fuller Test

data:  diff_3par
Dickey-Fuller = -3.5956, Lag order = 3, p-value = 0.04462
alternative hypothesis: stationary

3.4 Moving Average Smoothing for 3PAr

Code
# Calculate moving averages with different windows
ma_3par_3 <- ma(ts_3par, order = 3) # 3-year window (short-term)
ma_3par_5 <- ma(ts_3par, order = 5) # 5-year window (medium-term)
ma_3par_10 <- ma(ts_3par, order = 10) # 10-year window (long-term)

# Create comparison plot
autoplot(ts_3par, series = "Original") +
    autolayer(ma_3par_3, series = "MA(3)") +
    autolayer(ma_3par_5, series = "MA(5)") +
    autolayer(ma_3par_10, series = "MA(10)") +
    scale_color_manual(
        values = c(
            "Original" = "gray60",
            "MA(3)" = "#006bb6",
            "MA(5)" = "#f58426",
            "MA(10)" = "#000000"
        ),
        breaks = c("Original", "MA(3)", "MA(5)", "MA(10)")
    ) +
    scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
    labs(
        title = "3-Point Attempt Rate: Moving Average Smoothing Comparison",
        subtitle = "Analytics revolution's exponential growth pattern clearly visible",
        y = "3-Point Attempt Rate (3PAr)",
        x = "Season",
        color = "Series"
    ) +
    theme_minimal(base_size = 12) +
    theme(
        plot.title = element_text(face = "bold", size = 14),
        plot.subtitle = element_text(size = 11, color = "gray40"),
        legend.position = "bottom"
    )

The strongest structural break appears in 3PAr, which measures the share of shots taken from three. 3PAr rises modestly for decades and then accelerates sharply around 2012, around the same period where ORtg takes off. Lag plots show strong positive relationships across lags, and ACF/PACF behavior again indicates a trending series (non-stationary levels; stationary first differences). Smoothing highlights two regimes: a gradual era up to around 2012 and a rapid, near-exponential climb thereafter. This timing alignment supports the hypothesis that shot selection modernization (spacing, threes above the break, rim attempts enabled by space) is tightly coupled to league-wide efficiency gains

4. Attendance: COVID-19 Impact Analysis

4.1 Time Series Visualization

Code
# Calculate league-wide attendance by season
attendance_data <- all_adv_data %>%
    group_by(Season) %>%
    summarise(
        Total_Attendance = sum(`Unnamed: 29_level_0_Attend.`, na.rm = TRUE),
        Avg_Attendance = mean(`Unnamed: 30_level_0_Attend./G`, na.rm = TRUE),
        .groups = "drop"
    )

# Create time series (focusing on modern era 1990-2025)
attendance_data <- attendance_data %>% filter(Season >= 1990)
ts_attendance <- ts(attendance_data$Total_Attendance, start = 1990, frequency = 1)
Code
df_attendance <- data.frame(
    Year = attendance_data$Season,
    Value = attendance_data$Total_Attendance / 1e6, # Convert to millions
    Era = case_when(
        attendance_data$Season < 2020 ~ "Pre-COVID",
        attendance_data$Season >= 2020 & attendance_data$Season < 2022 ~ "COVID Era",
        attendance_data$Season >= 2022 ~ "Post-COVID Recovery"
    )
)

ggplot(df_attendance, aes(x = Year, y = Value, color = Era)) +
    geom_line(size = 1.2) +
    geom_point(size = 3) +
    geom_vline(xintercept = 2020, linetype = "dashed", color = "red", size = 1) +
    annotate("text",
        x = 2020, y = 24, label = "COVID-19\nPandemic (2020)",
        hjust = -0.05, color = "red", fontface = "bold", size = 3.5
    ) +
    annotate("rect",
        xmin = 2020, xmax = 2021, ymin = 0, ymax = 25,
        alpha = 0.1, fill = "red"
    ) +
    scale_color_manual(values = c(
        "Pre-COVID" = "#006bb6",
        "COVID Era" = "#d62728",
        "Post-COVID Recovery" = "#2ca02c"
    )) +
    labs(
        title = "NBA Total Attendance (1990-2025): COVID-19 Disruption and Recovery",
        subtitle = "90% collapse in 2020-21 followed by gradual recovery",
        x = "Season",
        y = "Total Attendance (Millions)",
        color = "Era"
    ) +
    theme_minimal(base_size = 12) +
    theme(
        plot.title = element_text(face = "bold", size = 14),
        plot.subtitle = element_text(size = 11, color = "gray40"),
        legend.position = "bottom"
    )

4.2 Lag Plots

Code
gglagplot(ts_attendance, do.lines = FALSE, lags = 9) +
    ggtitle("Lag Plot of Total Attendance") +
    theme_minimal()

4.3 ACF and PACF Analysis

Code
acf_attendance <- ggAcf(ts_attendance, lag.max = 15) +
    labs(title = "ACF of Total Attendance") +
    theme_minimal()

pacf_attendance <- ggPacf(ts_attendance, lag.max = 15) +
    labs(title = "PACF of Total Attendance") +
    theme_minimal()

acf_attendance / pacf_attendance

4.4 Moving Average Smoothing for Attendance

Code
# Calculate moving averages
ma_attendance_3 <- ma(ts_attendance, order = 3)
ma_attendance_5 <- ma(ts_attendance, order = 5)

# Plot comparison
autoplot(ts_attendance, series = "Original") +
    autolayer(ma_attendance_3, series = "MA(3)") +
    autolayer(ma_attendance_5, series = "MA(5)") +
    scale_color_manual(
        values = c(
            "Original" = "gray60",
            "MA(3)" = "#006bb6",
            "MA(5)" = "#f58426"
        ),
        breaks = c("Original", "MA(3)", "MA(5)")
    ) +
    labs(
        title = "Attendance: Moving Average Smoothing (COVID Shock Visible)",
        subtitle = "Smoothing cannot remove the dramatic 2020-21 disruption",
        y = "Total Attendance (millions)",
        x = "Season",
        color = "Series"
    ) +
    theme_minimal(base_size = 12) +
    theme(
        plot.title = element_text(face = "bold", size = 14),
        plot.subtitle = element_text(size = 11, color = "gray40"),
        legend.position = "bottom"
    )

Attendance provides the counterpoint: a stable pre-COVID plateau around ~21–22 million through 2019, a 2020–21 collapse during the bubble/limited-capacity seasons, and a partial recovery that remains below the pre-pandemic ceiling. The sharp, short-window discontinuity is a uncounted for shock rather than a new equilibrium. Even with 3–5 year moving averages, the COVID impact is too large to smooth away

5. Financial Data: Sports Betting Stocks - Time Series with Seasonality

5.1 Data Preparation and Time Series Creation

Code
# Load all sports betting stocks
dkng <- read_csv("data/financial/DKNG_daily.csv", show_col_types = FALSE) %>% mutate(Date = as.Date(Date))
penn <- read_csv("data/financial/PENN_daily.csv", show_col_types = FALSE) %>% mutate(Date = as.Date(Date))
mgm <- read_csv("data/financial/MGM_daily.csv", show_col_types = FALSE) %>% mutate(Date = as.Date(Date))
czr <- read_csv("data/financial/CZR_daily.csv", show_col_types = FALSE) %>% mutate(Date = as.Date(Date))

cat("DKNG:", nrow(dkng), "days |", min(dkng$Date), "to", max(dkng$Date), "\n")
DKNG: 1181 days | 18375 to 20088 
Code
cat("PENN:", nrow(penn), "days |", min(penn$Date), "to", max(penn$Date), "\n")
PENN: 1258 days | 18263 to 20088 
Code
cat("MGM:", nrow(mgm), "days |", min(mgm$Date), "to", max(mgm$Date), "\n")
MGM: 1258 days | 18263 to 20088 
Code
cat("CZR:", nrow(czr), "days |", min(czr$Date), "to", max(czr$Date), "\n")
CZR: 1258 days | 18263 to 20088 
Code
# Create weekly time series for all stocks
create_weekly_ts <- function(df, ticker) {
    weekly <- df %>%
        mutate(Year = year(Date), Week = week(Date)) %>%
        group_by(Year, Week) %>%
        summarise(Avg_Close = mean(`Adj Close`, na.rm = TRUE), .groups = "drop") %>%
        arrange(Year, Week)

    start_year <- min(weekly$Year)
    start_week <- weekly %>%
        filter(Year == start_year) %>%
        pull(Week) %>%
        min()
    ts(weekly$Avg_Close, start = c(start_year, start_week), frequency = 52)
}

ts_dkng <- create_weekly_ts(dkng, "DKNG")
ts_penn <- create_weekly_ts(penn, "PENN")
ts_mgm <- create_weekly_ts(mgm, "MGM")
ts_czr <- create_weekly_ts(czr, "CZR")

5.2 Comparative Visualization: All Four Betting Stocks

Code
# Combine all stocks for comparison (normalize to starting price = 100)
autoplot(ts_dkng / as.numeric(ts_dkng)[1] * 100, series = "DKNG") +
    autolayer(ts_penn / as.numeric(ts_penn)[1] * 100, series = "PENN") +
    autolayer(ts_mgm / as.numeric(ts_mgm)[1] * 100, series = "MGM") +
    autolayer(ts_czr / as.numeric(ts_czr)[1] * 100, series = "CZR") +
    scale_color_manual(values = c("DKNG" = "#006bb6", "PENN" = "#f58426", "MGM" = "#00a94f", "CZR" = "#c8102e")) +
    labs(
        title = "Sports Betting Stocks: Normalized Performance (2020-2024)",
        subtitle = "Indexed to 100 at each stock's start date | Boom-bust-stabilization pattern",
        y = "Normalized Price (Start = 100)", x = "Year", color = "Stock"
    ) +
    theme_minimal(base_size = 12) +
    theme(plot.title = element_text(face = "bold"), legend.position = "bottom")

5.3 DKNG Detailed Analysis

Code
autoplot(ts_dkng) +
    annotate("rect", xmin = 2021, xmax = 2021.5, ymin = 0, ymax = 70, alpha = 0.1, fill = "orange") +
    annotate("text", x = 2021.25, y = 65, label = "Peak Boom", color = "orange", fontface = "bold", size = 3) +
    labs(
        title = "DraftKings (DKNG) Weekly Stock Price (2020-2024)",
        subtitle = "IPO boom during COVID → correction → stabilization",
        x = "Year", y = "Avg Weekly Adj Close ($)"
    ) +
    theme_minimal(base_size = 12) +
    theme(plot.title = element_text(face = "bold", size = 14))

5.4 Seasonal Decomposition (DKNG)

Code
# Multiplicative decomposition (appropriate for stock prices)
decomp_dkng <- decompose(ts_dkng, type = "multiplicative")

# Plot decomposition
autoplot(decomp_dkng) +
    labs(title = "DKNG Stock: Seasonal Decomposition (Multiplicative Model)") +
    theme_minimal() +
    theme(plot.title = element_text(face = "bold", size = 14))

5.5 Moving Average Smoothing (DKNG)

Code
# Calculate moving averages (using weeks)
ma_dkng_4 <- ma(ts_dkng, order = 4) # Monthly smoothing (~4 weeks)
ma_dkng_13 <- ma(ts_dkng, order = 13) # Quarterly smoothing (~13 weeks)
ma_dkng_52 <- ma(ts_dkng, order = 52) # Annual smoothing (52 weeks)

autoplot(ts_dkng, series = "Original") +
    autolayer(ma_dkng_4, series = "MA(4 weeks)") +
    autolayer(ma_dkng_13, series = "MA(13 weeks)") +
    autolayer(ma_dkng_52, series = "MA(52 weeks)") +
    scale_color_manual(
        values = c(
            "Original" = "gray60",
            "MA(4 weeks)" = "#006bb6",
            "MA(13 weeks)" = "#f58426",
            "MA(52 weeks)" = "#000000"
        ),
        breaks = c("Original", "MA(4 weeks)", "MA(13 weeks)", "MA(52 weeks)")
    ) +
    labs(
        title = "DKNG Stock: Moving Average Smoothing Comparison",
        subtitle = "Different windows reveal trading cycles vs long-term trends",
        y = "Stock Price ($)",
        x = "Year",
        color = "Series"
    ) +
    theme_minimal(base_size = 12) +
    theme(
        plot.title = element_text(face = "bold", size = 14),
        plot.subtitle = element_text(size = 11, color = "gray40"),
        legend.position = "bottom"
    )

5.6 ACF and Lag Plots (DKNG)

Code
acf_dkng <- ggAcf(ts_dkng, lag.max = 52) +
    labs(title = "ACF of DKNG Weekly Stock Price") +
    theme_minimal()

pacf_dkng <- ggPacf(ts_dkng, lag.max = 52) +
    labs(title = "PACF of DKNG Weekly Stock Price") +
    theme_minimal()

acf_dkng / pacf_dkng

5.7 PENN Analysis

5.7.1 PENN Time Series Visualization

Code
autoplot(ts_penn) +
    annotate("rect", xmin = 2020, xmax = 2021.5, ymin = 0, ymax = 140, alpha = 0.1, fill = "red") +
    annotate("text", x = 2020, y = 130, label = "Covid-19", color = "red", fontface = "bold", size = 3) +
    labs(
        title = "Penn Entertainment (PENN) Weekly Stock Price (2020-2024)",
        x = "Year", y = "Avg Weekly Adj Close ($)"
    ) +
    theme_minimal(base_size = 12) +
    theme(plot.title = element_text(face = "bold", size = 14))

5.7.2 Seasonal Decomposition (PENN)

Code
# Multiplicative decomposition
decomp_penn <- decompose(ts_penn, type = "multiplicative")

autoplot(decomp_penn) +
    labs(title = "PENN Stock: Seasonal Decomposition (Multiplicative Model)") +
    theme_minimal() +
    theme(plot.title = element_text(face = "bold", size = 14))

5.7.3 Moving Average Smoothing (PENN)

Code
ma_penn_4 <- ma(ts_penn, order = 4)
ma_penn_13 <- ma(ts_penn, order = 13)
ma_penn_52 <- ma(ts_penn, order = 52)

autoplot(ts_penn, series = "Original") +
    autolayer(ma_penn_4, series = "MA(4 weeks)") +
    autolayer(ma_penn_13, series = "MA(13 weeks)") +
    autolayer(ma_penn_52, series = "MA(52 weeks)") +
    scale_color_manual(
        values = c(
            "Original" = "gray60",
            "MA(4 weeks)" = "#006bb6",
            "MA(13 weeks)" = "#f58426",
            "MA(52 weeks)" = "#000000"
        ),
        breaks = c("Original", "MA(4 weeks)", "MA(13 weeks)", "MA(52 weeks)")
    ) +
    labs(
        title = "PENN Stock: Moving Average Smoothing Comparison",
        subtitle = "Even annual smoothing cannot hide the structural collapse",
        y = "Stock Price ($)", x = "Year", color = "Series"
    ) +
    theme_minimal(base_size = 12) +
    theme(
        plot.title = element_text(face = "bold", size = 14),
        plot.subtitle = element_text(size = 11, color = "gray40"),
        legend.position = "bottom"
    )

5.7.4 ACF and Lag Plots (PENN)

Code
acf_penn <- ggAcf(ts_penn, lag.max = 52) +
    labs(title = "ACF of PENN Weekly Stock Price") +
    theme_minimal()

pacf_penn <- ggPacf(ts_penn, lag.max = 52) +
    labs(title = "PACF of PENN Weekly Stock Price") +
    theme_minimal()

acf_penn / pacf_penn

Because annual NBA series are effectively non-seasonal, I include weekly sports-betting equities to demonstrate seasonality and multiplicative decomposition. DraftKings (DKNG), Penn (PENN), MGM, and Caesars (CZR) all show pandemic-era boom-bust dynamics on weekly data. Prices are non-stationary in levels, stationary in differences, and has volatility that scales with price; implying a multiplicative model is necessary for decomposition. DKNG exhibits a large run-up, correction, and stabilization while PENN shows a sharper hype-driven spike and deeper collapse.

Pulling the findings together: ORtg, Pace, 3PAr, and Attendance are all non-stationary in levels and become stationary after first differences (d = 1). Therefore, additive decomposition is appropriate for the NBA metrics , while multiplicative decomposition fits the weekly equities. Short and medium moving-average windows clarify regime shifts: the 2012 analytics inflection in ORtg/3PAr, the mid-2000s trough and rebound in Pace, and the COVID intervention in Attendance.